# Replication script for BLAR article 'Brazilian Regional Leadership Revisited: Testing the Long-Term Determinants of South American Followership (1995-2015)'
## by Rafael Mesquita

# LIBRARIES AND DATA
library(tidyverse)
library(stats)
library(car)
library(lmtest)
library(broom)
library(plm)
library(stargazer)

InOutAmSul.red7 <- read.csv(file="InOutAmsul.red7.csv")




# FORMULAS (not all used)
tscsForm19 <- s3un ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap # with trend, fhGap control, no lags
tscsForm20 <- s3un ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + s3un_lag1 # with trend, fhGap control, lag1
tscsForm21 <- s3un ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + s3un_lag2 # with trend, fhGap control, lag2 
tscsForm22 <- s3un ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + s3un_lag1 + s3un_lag2 # with trend, fhGap control, lag1 + lag2
# tscsForm23 <- s3un ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + s3un_lag1 + s3un_lag2 + arg95 # with trend, fhGap control, lag1 + lag2, dummy for argentina 1995
# tscsForm24 <- s3un ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + s3un_lag1 + s3un_lag2 + arg95 + pry13 + sur15 # with trend, fhGap control, lag1 + lag2, dummies for all 3 deviant cases
tscsForm25 <- s3un ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + s3un_lag1 + s3un_lag2 + arg95 + pry13 + sur15 # same as 24, but ODA in million USD
# 
# tscsForm26 <- s3un ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + s3un_lag1 + s3un_lag2 + arg95 + pry13 + sur15 + as.factor(Partner.PT) # same as 25, but with dummies for all existing countries
# tscsForm27 <- s3un ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + s3un_lag1 + s3un_lag2 + arg95 + pry13 + sur15 + as.factor(Partner.PT) + as.factor(Year) # same as 25, but with bummies for all existing countries and years
# 
tscsForm28 <- s3un ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + s3un_lag1 + s3un_lag2 + arg95 + pry13 + sur15 + RI # same as 25, but with a Region of Interest variable for AmLat
# 
# tscsForm29 <- perc_spon_draft ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + perc_spon_draft_lag1 + perc_spon_draft_lag2 # same as 24, no country dummies, DV and LDVs are on draft co-sponsorship
# tscsForm30 <- perc_spon_draft ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + perc_spon_draft_lag1 # ldv1
# tscsForm31 <- perc_spon_draft ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap  # no lags
tscsForm32 <- n_spon_draft ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + n_spon_draft_lag1 + n_spon_draft_lag2 # absolute number of drafts, lag1+2
tscsForm33 <- n_spon_draft ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + n_spon_draft_lag1
tscsForm34 <- n_spon_draft ~ ln_Trade.Sumd + ODA.Sumd.mi + ln_indiceSh + ln_DepTradeBRd + cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap







# TSCS MODELS

tscs.InOutAmSul <- pdata.frame(InOutAmSul.red7, index=c("Partner.PT", "Year"))

## Pooled OLS
tscs.InOutAmSul.pool.c <-  plm(tscsForm19,data = tscs.InOutAmSul, model = "pooling")

### Test for AR1 auto-correlation (Wooldridge)
tscs.InOutAmSul.pool.c.rs <- tscs.InOutAmSul.pool.c$residuals
length(tscs.InOutAmSul.pool.c.rs)
tscs.InOutAmSul.pool.c.rs2 <- cbind(tscs.InOutAmSul, tscs.InOutAmSul.pool.c.rs)
colnames(tscs.InOutAmSul.pool.c.rs2)[34] <- "res"
tscs.InOutAmSul.pool.c.rs2 <- pdata.frame(tscs.InOutAmSul.pool.c.rs2, index = c("Partner.PT", "Year"))
tscs.InOutAmSul.pool.c.rs2p <- plm(res ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + 
                                     cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + lag(res, k=1),
                                   data = tscs.InOutAmSul.pool.c.rs2, model = "pooling")
summary(tscs.InOutAmSul.pool.c.rs2p) # lag1 for res shows auto-correlation
stargazer(tscs.InOutAmSul.pool.c.rs2p, title = "Teste Corr. Serial (Eq.1, lag 0)",
          type="text", align = T, decimal.mark = ",", digit.separator = ".") # replicates table on Annex p.5

## Pooled OLS with lag1
tscs.InOutAmSul.pool.l1.c <-  plm(tscsForm20,data = tscs.InOutAmSul, model = "pooling")

### Test for AR1
tscs.InOutAmSul.pool.l1.c.rs <- tscs.InOutAmSul.pool.l1.c$residuals
length(tscs.InOutAmSul.pool.l1.c.rs)
tscs.InOutAmSul.pool.l1.c.rs2 <- cbind(tscs.InOutAmSul, tscs.InOutAmSul.pool.l1.c.rs)
colnames(tscs.InOutAmSul.pool.l1.c.rs2)[34] <- "res"
tscs.InOutAmSul.pool.l1.c.rs2 <- pdata.frame(tscs.InOutAmSul.pool.l1.c.rs2, index = c("Partner.PT", "Year"))
tscs.InOutAmSul.pool.l1.c.rs2p <- plm(res ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + 
                                        cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + 
                                        s3un_lag1 + lag(res, k=1),
                                      data = tscs.InOutAmSul.pool.l1.c.rs2, model = "pooling")
summary(tscs.InOutAmSul.pool.l1.c.rs2p) # lag1 for res shows auto-correlation

## Pooled OLS with lag2 (no lag1)
tscs.InOutAmSul.pool.l2.c <-  plm(tscsForm21,data = tscs.InOutAmSul, model = "pooling")

### Test for AR1
tscs.InOutAmSul.pool.l2.c.rs <- tscs.InOutAmSul.pool.l2.c$residuals
length(tscs.InOutAmSul.pool.l2.c.rs)
tscs.InOutAmSul.pool.l2.c.rs2 <- cbind(tscs.InOutAmSul, tscs.InOutAmSul.pool.l2.c.rs)
colnames(tscs.InOutAmSul.pool.l2.c.rs2)[34] <- "res"
tscs.InOutAmSul.pool.l2.c.rs2 <- pdata.frame(tscs.InOutAmSul.pool.l2.c.rs2, index = c("Partner.PT", "Year"))
tscs.InOutAmSul.pool.l2.c.rs2p <- plm(res ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + 
                                        cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + 
                                        s3un_lag2 + lag(res, k=1),
                                      data = tscs.InOutAmSul.pool.l2.c.rs2, model = "pooling")
summary(tscs.InOutAmSul.pool.l2.c.rs2p) # lag1 for res shows auto-correlation

## Pooled OLS with lag1 + 2
tscs.InOutAmSul.pool.l12.c <-  plm(tscsForm22,data = tscs.InOutAmSul, model = "pooling")
summary(tscs.InOutAmSul.pool.l12.c)

### Test for AR1
tscs.InOutAmSul.pool.l12.c.rs <- tscs.InOutAmSul.pool.l12.c$residuals
length(tscs.InOutAmSul.pool.l12.c.rs)
tscs.InOutAmSul.pool.l12.c.rs2 <- cbind(tscs.InOutAmSul, tscs.InOutAmSul.pool.l12.c.rs)
colnames(tscs.InOutAmSul.pool.l12.c.rs2)[34] <- "res"
tscs.InOutAmSul.pool.l12.c.rs2 <- pdata.frame(tscs.InOutAmSul.pool.l12.c.rs2, index = c("Partner.PT", "Year"))
tscs.InOutAmSul.pool.l12.c.rs2p <- plm(res ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + 
                                         cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + 
                                         s3un_lag1 + s3un_lag2 + lag(res, k=1),
                                       data = tscs.InOutAmSul.pool.l12.c.rs2, model = "pooling")
summary(tscs.InOutAmSul.pool.l12.c.rs2p) # no serial correlation

### Compare lagged models
stargazer(tscs.InOutAmSul.pool.l1.c.rs2p,
          tscs.InOutAmSul.pool.l2.c.rs2p,
          tscs.InOutAmSul.pool.l12.c.rs2p, title = "Teste Corr. Serial (Eq.2 e lags)",
          column.labels = c("lag1", "lag2", "lag1+2"),
          type="text", align = T, decimal.mark = ",", digit.separator = ".") # replicates table on Annex p.6

## Fixed Effects with lag1 + 2
tscs.InOutAmSul.fe.l12.c <-  plm(tscsForm22,data = tscs.InOutAmSul, model = "within")

### Auto-correlation test for FE
pwartest(tscs.InOutAmSul.fe.l12.c) # replicates tests on Annex p.8

## Pooled OLS vs FE
### F test, Breusch-Pagan test # replicates tests on Annex p.8
pFtest(tscs.InOutAmSul.fe.l12.c, tscs.InOutAmSul.pool.l12.c)
plmtest(tscs.InOutAmSul.fe.l12.c, effect = "individual", type = "bp")







# DIAGNOSTICS
tscs.InOutAmSul.pool.l12.c.man <-  lm(tscsForm22,data = InOutAmSul.red7) # replica of plm model using lm, so that car package tests are applicable

## Heteroskedasticity
### Breusch-Pagan
bptest(tscs.InOutAmSul.pool.l12.c.man, studentize = F) # H0 rejected, so heteroskedastic. # replicates Annex p.9 test

### Panel heteroskedasticity test
pcdtest(s3un ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + 
          cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + 
          s3un_lag1 + s3un_lag2, data=tscs.InOutAmSul,
        model="pooling",
        test="cd") # replicates Annex p.9 test

### Visual inspection # replicates Annex p.9 figure
plot(rep(1995:2015, each=11), tscs.InOutAmSul.pool.l12.c.man$residuals,
     xlab="Ano", ylab="Res.", main="OLS Res. x Tempo")

plot(droplevels(as.factor(InOutAmSul.red7$Partner.PT)),
     tscs.InOutAmSul.pool.l12.c.man$residuals, cex.axis=0.55,
     xlab="", ylab="Res.", main="OLS Res. x País")


## Outliers and influential observations
influenceIndexPlot(lm(s3un ~ ln_Trade.Sumd + ODA.Sumd + ln_indiceSh + ln_DepTradeBRd + 
                        cincRGap + idDistAbs + PolRiskBR + PolRisk + t + fhGap + 
                        s3un_lag1 + s3un_lag2,
                      data=InOutAmSul.red7),
                   vars=c("Cook", "Studentized", "hat"), id.n = 5, id.cex = 1.5,
                   cex.lab=1.8, grid=F,
                   main="OLS Pontos influentes") # replicates Annex p.10 figure






# TSCS MODELS WITH 3 COUNTRY DUMMIES

## Pooled OLS
tscs.InOutAmSul.pool.l12.c.d <- plm(tscsForm25, data=tscs.InOutAmSul, model="pooling")

### Add BK PCSE
tscs.InOutAmSul.pool.l12.c.d.pcse <- coeftest(tscs.InOutAmSul.pool.l12.c.d,
                                              vcov.=function(x) vcovBK(x, type="HC0",
                                                                       cluster="time"))

## FE (twoways)
tscs.InOutAmSul.fe2.l12.c.d <-  plm(tscsForm25,data = tscs.InOutAmSul, model = "within", effect="twoways")

## First Differences with lag 1+2 and dummies
tscs.InOutAmSul.fd.l12.c.d <-  plm(tscsForm25,data = tscs.InOutAmSul, model = "fd")


# DEFINITIVE TSCS MODELS
stargazer(tscs.InOutAmSul.pool.l12.c.d.pcse,
          tscs.InOutAmSul.fe2.l12.c.d,
          tscs.InOutAmSul.fd.l12.c.d,
          type="text", title="Resultados regressão",
          decimal.mark=",", digit.separator=".",
          column.labels =c("OLS PCSE",
                           "FE (two w.)",
                           "FD"),
          align=TRUE, no.space=TRUE) # Replicates Table 2





# ROBUSTNESS TESTS

## Robustness test 1: Replace dependent variable (amount of UNGA draft co-sponsorship instead of S-Scores)
InOutAmSul.red7.Dr <- read.csv(file="InOutAmSul.red7.Dr.csv")

### TSCS
tscs.InOutAmSulDr <- pdata.frame(InOutAmSul.red7.Dr, index = c("Partner.ISO", "Year"))
#### Pooled OLS with PCSE
tscs.InOutAmSulDr.n.pool.l2 <- plm(tscsForm32, data=tscs.InOutAmSulDr, model = "pooling")
tscs.InOutAmSulDr.n.pool.l1 <- plm(tscsForm33, data=tscs.InOutAmSulDr, model = "pooling")
tscs.InOutAmSulDr.n.pool <- plm(tscsForm34, data=tscs.InOutAmSulDr, model = "pooling")

##### Auto-correlation tests
pbgtest(tscs.InOutAmSulDr.n.pool.l2) # none
pdwtest(tscs.InOutAmSulDr.n.pool.l2) # none
pbgtest(tscs.InOutAmSulDr.n.pool.l1) # corr
pdwtest(tscs.InOutAmSulDr.n.pool.l1) # none

#### FE twoways
tscs.InOutAmSulDr.n.fe.l2.tw <- plm(tscsForm32, data=tscs.InOutAmSulDr, model = "within", effect="twoways")

#### Pooled OLS vs FE
pFtest(tscs.InOutAmSulDr.n.fe.l2.tw, tscs.InOutAmSulDr.n.pool.l2)
plmtest(tscs.InOutAmSulDr.n.pool.l2, effect = "twoways", type = "bp") # keep FE twoways

stargazer(
  tscs.InOutAmSulDr.n.fe.l2.tw,
  type="text", title="Resultados regressão",
  decimal.mark=",", digit.separator=".",
  column.labels =c(
    "FE (two w.)"),
  align=TRUE, no.space=TRUE) # replicates Annex p.8 table




## Robustness test 2: Increase sample size (Latin America)
InOutAmLat2 <- read.csv(file="InOutAmLat2.csv")

### TSCS
tscs.InOutAmLat <- pdata.frame(InOutAmLat2, index=c("Partner.PT", "Year"))

#### Pooled OLS lag 1+2 with dummies
tscs.InOutAmLat.pool.l12.c.d <- plm(tscsForm28, data=tscs.InOutAmLat, model = "pooling")
#### Compare results South vs Latin America
stargazer(tscs.InOutAmSul.pool.l12.c.d,
          tscs.InOutAmLat.pool.l12.c.d,
          column.labels = c("Base Am. Sul", "Base Am. Lat"),
          type="text", align = T, decimal.mark = ",", digit.separator = ".")
#### with PCSE
tscs.InOutAmLat.pool.l12.c.d.pcse <- coeftest(tscs.InOutAmLat.pool.l12.c.d,
                                              vcov.=function(x) vcovBK(x, type="HC0",
                                                                       cluster="time"))
# tscs.InOutAmSul.pool.l12.c.d.pcse2 <- coeftest(tscs.InOutAmSul.pool.l12.c.d,
#                                                vcov.=function(x) vcovBK(x, type = c("HC0"),
#                                                                         cluster="time"))
stargazer(tscs.InOutAmSul.pool.l12.c.d,
          tscs.InOutAmSul.pool.l12.c.d.pcse,
          tscs.InOutAmLat.pool.l12.c.d,
          tscs.InOutAmLat.pool.l12.c.d.pcse,
          title = "Base Am. Sul vs. Am. Lat.",
          column.labels = c("OLS Am. Sul", "OLS PCSE Am. Sul.", "OLS Am. Lat.", "OLS PCSE Am. Lat"),
          type="text", align = T, decimal.mark = ",", digit.separator = ".") # replicates Annex p.13 table



# BETA COEFFICIENTS
InOutAmLat2sc <- InOutAmLat2[,c(6:22)] %>%
  mutate_all(funs(scale))
summary(InOutAmLat2sc)
InOutAmLat2sc <- cbind(InOutAmLat2sc, InOutAmLat2[,c(2,3)])
tscs.InOutAmLat.sc <- pdata.frame(InOutAmLat2sc, index=c("Partner.PT", "Year")) # calculate z-score for all variables
names(tscs.InOutAmLat.sc)
tscs.InOutAmLat.pool.l12.c.d.sc <- plm(formula = tscsForm28, data = tscs.InOutAmLat.sc, model = "pooling")
tscs.InOutAmLat.pool.l12.c.d.sc.man <- lm(formula = tscsForm28, data = InOutAmLat2sc)
tscs.InOutAmLat.pool.l12.c.d.sc.man.coef <- tidy(tscs.InOutAmLat.pool.l12.c.d.sc.man, conf.int = T)
colnames(tscs.InOutAmLat.pool.l12.c.d.sc.man.coef) <- c("term", "estimate.man", "std.error.man", "statistic.man", "p.value.man", "ci25.man", "ci95.man")
tscs.InOutAmLat.pool.l12.c.d.sc.pcse <- coeftest(tscs.InOutAmLat.pool.l12.c.d.sc,
                                                 vcov.=function(x) vcovBK(x, type="HC0",
                                                                          cluster="time")) # with PCSE
tscs.InOutAmLat.pool.l12.c.d.sc.pcse.coef <- tidy(tscs.InOutAmLat.pool.l12.c.d.sc.pcse, conf.int = T)
colnames(tscs.InOutAmLat.pool.l12.c.d.sc.pcse.coef) <- c("term", "estimate.pcse", "std.error.pcse", "statistic.pcse", "p.value.pcse", "ci25.pcse", "ci95.pcse")
tscs.InOutAmLat.pool.l12.c.d.sc.coef <- merge(tscs.InOutAmLat.pool.l12.c.d.sc.pcse.coef, tscs.InOutAmLat.pool.l12.c.d.sc.man.coef, by="term", all=T)
tscs.InOutAmLat.pool.l12.c.d.sc.coef$term <- c("Intercepto", "Arg.95", "Gap de poder", "Gap democr.", "Dist. ideol.", "Dep. comerc. BR (log)", "IAD (%, log)", "Com. pot. extrarreg. (log)", "AOD pot. extrarreg.", "Estab. pol.", "Estab. pol. BR", "Par.13", "Am. Sul", "LDV1", "LDV2", "Sur.15", "t")

ggplot(tscs.InOutAmLat.pool.l12.c.d.sc.coef, aes(x=reorder(term,abs(estimate.man)) , y=estimate.man))+
  geom_pointrange(aes(ymin=ci25.man, ymax=ci95.man, color="OLS"), size=2.5, fatten=2)+
  geom_pointrange(aes(ymin=ci25.pcse, ymax=ci95.pcse, color="OLS c/ PCSE"), size=1, fatten=0.1)+
  geom_hline(yintercept=0)+
  scale_color_manual(values = c("black", "red"))+
  labs(y="Coeficiente beta", x="", color="Intervalo \nde confiança")+
  theme(legend.key.size = unit(1, "cm"))+
  coord_flip() # replicates Annex p.14 figure



